First, we set up our workspace. Load maftools to analyze the maf files.
Get all the mafs and merge them into one big maf.
library(maftools)
library(tidyverse)
library(synapser)
synLogin()
## Welcome, Robert Allaway!
## NULL
child <- synGetChildren('syn35817218')$asList()
ids <- map(child, "id") %>% unlist()
maf_paths <- lapply(ids, synGet)
##
Downloading [##########----------]47.90% 8.0MB/16.7MB (2.6MB/s) Strelka_28cNF.vep.maf.synapse_download_102758853
Downloading [##########----------]52.10% 8.7MB/16.7MB (2.8MB/s) Strelka_28cNF.vep.maf.synapse_download_102758853
Downloading [####################]100.00% 16.7MB/16.7MB (5.4MB/s) Strelka_28cNF.vep.maf.synapse_download_102758853 Done...
Downloading [###########---------]53.44% 8.0MB/15.0MB (2.3MB/s) Strelka_cNF00.10a.vep.maf.synapse_download_102758854
Downloading [####################]100.00% 15.0MB/15.0MB (4.3MB/s) Strelka_cNF00.10a.vep.maf.synapse_download_102758854 Done...
Downloading [##########----------]50.89% 8.0MB/15.7MB (2.0MB/s) Strelka_cNF04.9a.vep.maf.synapse_download_102758856
Downloading [####################]100.00% 15.7MB/15.7MB (3.9MB/s) Strelka_cNF04.9a.vep.maf.synapse_download_102758856 Done...
Downloading [###-----------------]13.82% 2.6MB/18.6MB (443.6kB/s) Strelka_cNF97.2a.vep.maf.synapse_download_102758857
Downloading [###########---------]56.91% 10.6MB/18.6MB (1.8MB/s) Strelka_cNF97.2a.vep.maf.synapse_download_102758857
Downloading [####################]100.00% 18.6MB/18.6MB (3.1MB/s) Strelka_cNF97.2a.vep.maf.synapse_download_102758857 Done...
Downloading [#########-----------]47.26% 8.0MB/16.9MB (1.6MB/s) Strelka_cNF97.2b.vep.maf.synapse_download_102758861
Downloading [###################-]94.53% 16.0MB/16.9MB (3.3MB/s) Strelka_cNF97.2b.vep.maf.synapse_download_102758861
Downloading [####################]100.00% 16.9MB/16.9MB (3.5MB/s) Strelka_cNF97.2b.vep.maf.synapse_download_102758861 Done...
Downloading [#########-----------]46.91% 8.0MB/17.1MB (2.2MB/s) Strelka_cNF98.4c.vep.maf.synapse_download_102758862
Downloading [###################-]93.81% 16.0MB/17.1MB (4.3MB/s) Strelka_cNF98.4c.vep.maf.synapse_download_102758862
Downloading [####################]100.00% 17.1MB/17.1MB (4.6MB/s) Strelka_cNF98.4c.vep.maf.synapse_download_102758862 Done...
Downloading [########------------]41.11% 8.0MB/19.5MB (1.8MB/s) Strelka_cNF98.4d.vep.maf.synapse_download_102758865
Downloading [################----]82.22% 16.0MB/19.5MB (3.5MB/s) Strelka_cNF98.4d.vep.maf.synapse_download_102758865
Downloading [####################]100.00% 19.5MB/19.5MB (4.3MB/s) Strelka_cNF98.4d.vep.maf.synapse_download_102758865 Done...
Downloading [#########-----------]43.52% 8.0MB/18.4MB (2.0MB/s) Strelka_i28cNF.vep.maf.synapse_download_102758867
Downloading [#################---]87.04% 16.0MB/18.4MB (4.0MB/s) Strelka_i28cNF.vep.maf.synapse_download_102758867
Downloading [####################]100.00% 18.4MB/18.4MB (4.6MB/s) Strelka_i28cNF.vep.maf.synapse_download_102758867 Done...
Downloading [########------------]42.23% 8.0MB/18.9MB (1.8MB/s) Strelka_icNF00.10a.vep.maf.synapse_download_102758869
Downloading [############--------]57.77% 10.9MB/18.9MB (2.4MB/s) Strelka_icNF00.10a.vep.maf.synapse_download_102758869
Downloading [####################]100.00% 18.9MB/18.9MB (4.2MB/s) Strelka_icNF00.10a.vep.maf.synapse_download_102758869 Done...
Downloading [####----------------]18.49% 3.6MB/19.6MB (753.3kB/s) Strelka_icNF04.9a.vep.maf.synapse_download_102758870
Downloading [############--------]59.24% 11.6MB/19.6MB (2.4MB/s) Strelka_icNF04.9a.vep.maf.synapse_download_102758870
Downloading [####################]100.00% 19.6MB/19.6MB (4.0MB/s) Strelka_icNF04.9a.vep.maf.synapse_download_102758870 Done...
Downloading [###-----------------]13.75% 2.6MB/18.6MB (596.1kB/s) Strelka_icNF97.2a.vep.maf.synapse_download_102758872
Downloading [###########---------]56.88% 10.6MB/18.6MB (2.4MB/s) Strelka_icNF97.2a.vep.maf.synapse_download_102758872
Downloading [####################]100.00% 18.6MB/18.6MB (4.2MB/s) Strelka_icNF97.2a.vep.maf.synapse_download_102758872 Done...
Downloading [#########-----------]46.27% 6.9MB/14.9MB (1.9MB/s) Strelka_icNF97.2b.vep.maf.synapse_download_102758875
Downloading [####################]100.00% 14.9MB/14.9MB (4.0MB/s) Strelka_icNF97.2b.vep.maf.synapse_download_102758875 Done...
Downloading [#########-----------]45.87% 8.0MB/17.4MB (1.7MB/s) Strelka_icNF98.4c.vep.maf.synapse_download_102758877
Downloading [###########---------]54.13% 9.4MB/17.4MB (2.0MB/s) Strelka_icNF98.4c.vep.maf.synapse_download_102758877
Downloading [####################]100.00% 17.4MB/17.4MB (3.7MB/s) Strelka_icNF98.4c.vep.maf.synapse_download_102758877 Done...
Downloading [##########----------]51.10% 8.0MB/15.7MB (1.9MB/s) Strelka_icNF98.4d.vep.maf.synapse_download_102758879
Downloading [####################]100.00% 15.7MB/15.7MB (3.7MB/s) Strelka_icNF98.4d.vep.maf.synapse_download_102758879 Done...
mafs <- lapply(maf_paths, function(x){
maftools::read.maf(x$path)
})
## -Reading
## -Validating
## -Silent variants: 17776
## -Summarizing
## --Possible FLAGS among top ten genes:
## AHNAK2
## MUC16
## -Processing clinical data
## --Missing clinical data
## -Finished in 1.020s elapsed (0.896s cpu)
## -Reading
## -Validating
## -Silent variants: 15636
## -Summarizing
## --Possible FLAGS among top ten genes:
## MUC16
## -Processing clinical data
## --Missing clinical data
## -Finished in 0.688s elapsed (0.599s cpu)
## -Reading
## -Validating
## -Silent variants: 16648
## -Summarizing
## --Possible FLAGS among top ten genes:
## MUC16
## -Processing clinical data
## --Missing clinical data
## -Finished in 0.657s elapsed (0.563s cpu)
## -Reading
## -Validating
## -Silent variants: 19848
## -Summarizing
## -Processing clinical data
## --Missing clinical data
## -Finished in 0.833s elapsed (0.687s cpu)
## -Reading
## -Validating
## -Silent variants: 17921
## -Summarizing
## --Possible FLAGS among top ten genes:
## MUC16
## -Processing clinical data
## --Missing clinical data
## -Finished in 0.791s elapsed (0.666s cpu)
## -Reading
## -Validating
## -Silent variants: 18138
## -Summarizing
## --Possible FLAGS among top ten genes:
## AHNAK2
## MUC16
## -Processing clinical data
## --Missing clinical data
## -Finished in 0.473s elapsed (0.378s cpu)
## -Reading
## -Validating
## -Silent variants: 20608
## -Summarizing
## --Possible FLAGS among top ten genes:
## AHNAK2
## MUC16
## -Processing clinical data
## --Missing clinical data
## -Finished in 0.611s elapsed (0.488s cpu)
## -Reading
## -Validating
## -Silent variants: 19699
## -Summarizing
## --Possible FLAGS among top ten genes:
## AHNAK2
## MUC16
## -Processing clinical data
## --Missing clinical data
## -Finished in 0.877s elapsed (0.746s cpu)
## -Reading
## -Validating
## -Silent variants: 20231
## -Summarizing
## --Possible FLAGS among top ten genes:
## MUC16
## -Processing clinical data
## --Missing clinical data
## -Finished in 0.488s elapsed (0.386s cpu)
## -Reading
## -Validating
## -Silent variants: 21032
## -Summarizing
## --Possible FLAGS among top ten genes:
## MUC16
## -Processing clinical data
## --Missing clinical data
## -Finished in 0.581s elapsed (0.432s cpu)
## -Reading
## -Validating
## -Silent variants: 19790
## -Summarizing
## -Processing clinical data
## --Missing clinical data
## -Finished in 0.810s elapsed (0.699s cpu)
## -Reading
## -Validating
## -Silent variants: 15573
## -Summarizing
## -Processing clinical data
## --Missing clinical data
## -Finished in 0.560s elapsed (0.380s cpu)
## -Reading
## -Validating
## -Silent variants: 18430
## -Summarizing
## --Possible FLAGS among top ten genes:
## AHNAK2
## MUC16
## -Processing clinical data
## --Missing clinical data
## -Finished in 0.728s elapsed (0.628s cpu)
## -Reading
## -Validating
## -Silent variants: 16403
## -Summarizing
## --Possible FLAGS among top ten genes:
## AHNAK2
## MUC16
## -Processing clinical data
## --Missing clinical data
## -Finished in 0.421s elapsed (0.388s cpu)
merged_maf <- merge_mafs(mafs)
## Merging 14 MAF objects
## -Validating
## -Silent variants: 257733
## -Summarizing
## --Possible FLAGS among top ten genes:
## AHNAK2
## MUC16
## -Processing clinical data
## -Finished in 2.580s elapsed (2.348s cpu)
First, let’s compare the immortalized vs primary cells to see if there are any significant differentially mutated genes. There do not appear to be (lowest p-value ~0.1 and all p-values = 1 after FDR correction). With more cell lines, we might be able to detect a difference here, but with the current experiment we don’t detect any significantly differentially mutated genes.
primary_mafs <- subsetMaf(merged_maf, tsb = c("28cNF","cNF00.10a","cNF04.9a","cNF97.2a","cNF97.2b","cNF98.4c","cNF98.4d"))
## --Possible FLAGS among top ten genes:
## AHNAK2
## MUC16
## -Processing clinical data
immortalized_mafs <- subsetMaf(merged_maf, tsb = c("i28cNF","icNF00.10a","icNF04.9a","icNF97.2a","icNF97.2b","icNF98.4c","icNF98.4d"))
## --Possible FLAGS among top ten genes:
## AHNAK2
## MUC16
## -Processing clinical data
compare_mafs <- mafCompare(primary_mafs,immortalized_mafs, m1Name = "Primary", m2Name = "Immortalized", minMut = 0)
compare_mafs[1]
## $results
## Hugo_Symbol Primary Immortalized pval or ci.up
## 1: NUP153 5 1 0.1025641 11.717446 817.982011
## 2: TAF4 5 1 0.1025641 11.717446 817.982011
## 3: IFNA4 0 3 0.1923077 0.000000 2.166928
## 4: SORD 3 0 0.1923077 Inf Inf
## 5: CCNA2 4 1 0.2657343 6.788517 456.287433
## ---
## 1772: ZNF814 7 7 1.0000000 0.000000 Inf
## 1773: ZNF831 4 4 1.0000000 1.000000 13.029360
## 1774: ZNF91 2 2 1.0000000 1.000000 19.262994
## 1775: ZSWIM6 2 1 1.0000000 2.255400 164.580622
## 1776: ZWINT 2 2 1.0000000 1.000000 19.262994
## ci.low adjPval
## 1: 0.72033908 1
## 2: 0.72033908 1
## 3: 0.00000000 1
## 4: 0.46148290 1
## 5: 0.41998181 1
## ---
## 1772: 0.00000000 1
## 1773: 0.07674974 1
## 1774: 0.05191301 1
## 1775: 0.09086265 1
## 1776: 0.05191301 1
Let’s plot a summary of each maf.
lapply(mafs, function(x){
plotmafSummary(x)
})
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
##
## [[5]]
## NULL
##
## [[6]]
## NULL
##
## [[7]]
## NULL
##
## [[8]]
## NULL
##
## [[9]]
## NULL
##
## [[10]]
## NULL
##
## [[11]]
## NULL
##
## [[12]]
## NULL
##
## [[13]]
## NULL
##
## [[14]]
## NULL
Let’s plot a summary of each cohort.
p <- plotmafSummary(primary_mafs, showBarcodes = T, textSize = 0.5)
p
## NULL
pdf('../figures/Strelka2_primaryMafSummary.pdf')
p
## NULL
dev.off()
## quartz_off_screen
## 2
p <- plotmafSummary(immortalized_mafs, showBarcodes = T, textSize = 0.5)
p
## NULL
pdf('../figures/Strelka2_immortalizedMafSummary.pdf')
p
## NULL
dev.off()
## quartz_off_screen
## 2
p <- lollipopPlot2(primary_mafs, immortalized_mafs, gene = 'NF1',m1_label = 'all', m2_label = 'all',labPosAngle = 45, labPosSize = 0.75, domainLabelSize = 0.5, m1_name = "Primary", m2_name = "Immortalized")
## HGNC refseq.ID protein.ID aa.length
## 1: NF1 NM_000267 NP_000258 2818
## 2: NF1 NM_001042492 NP_001035957 2839
## HGNC refseq.ID protein.ID aa.length
## 1: NF1 NM_000267 NP_000258 2818
## 2: NF1 NM_001042492 NP_001035957 2839
p
## $M1
## Domain_Start Domain_End Domain_Label Variant_Classification Conversion
## 1: NA NA <NA> Frame_Shift_Del N78Ifs*7
## 2: NA NA <NA> Frame_Shift_Del M643Ifs*45
## Position n_variants
## 1: 78 2
## 2: 643 1
##
## $M2
## Domain_Start Domain_End Domain_Label Variant_Classification Conversion
## 1: NA NA <NA> Frame_Shift_Del N78Ifs*7
## 2: NA NA <NA> Splice_Site X2235_splice
## 3: NA NA <NA> Frame_Shift_Del T2464Kfs*13
## Position n_variants
## 1: 78 1
## 2: 2235 1
## 3: 2464 1
pdf("../figures/DeepVariant_NF1_variants.pdf")
p
## $M1
## Domain_Start Domain_End Domain_Label Variant_Classification Conversion
## 1: NA NA <NA> Frame_Shift_Del N78Ifs*7
## 2: NA NA <NA> Frame_Shift_Del M643Ifs*45
## Position n_variants
## 1: 78 2
## 2: 643 1
##
## $M2
## Domain_Start Domain_End Domain_Label Variant_Classification Conversion
## 1: NA NA <NA> Frame_Shift_Del N78Ifs*7
## 2: NA NA <NA> Splice_Site X2235_splice
## 3: NA NA <NA> Frame_Shift_Del T2464Kfs*13
## Position n_variants
## 1: 78 1
## 2: 2235 1
## 3: 2464 1
dev.off()
## quartz_off_screen
## 2
merged_maf@data %>%
filter(Hugo_Symbol == "NF1") %>%
select(Tumor_Sample_Barcode, Variant_Classification, Variant_Type,
HGVSc, HGVSp_Short,
dbSNP_RS, SIFT, PolyPhen, IMPACT) %>%
set_names(c("Cell Line", "Variant Classification", "Variant Type",
"Genetic Change", "Protein Change", "dbSNP ID", "SIFT", "PolyPhen", "IMPACT")) %>%
write_csv("../figures/Strelka2_NF1_variants.csv")
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] synapser_0.15.35 forcats_0.5.2 stringr_1.4.1 dplyr_1.0.10
## [5] purrr_0.3.4 readr_2.1.3 tidyr_1.2.1 tibble_3.1.8
## [9] ggplot2_3.3.6 tidyverse_1.3.2 maftools_2.12.0
##
## loaded via a namespace (and not attached):
## [1] httr_1.4.4 sass_0.4.2 bit64_4.0.5
## [4] vroom_1.6.0 jsonlite_1.8.2 splines_4.2.1
## [7] pack_0.1-1 modelr_0.1.9 bslib_0.4.0
## [10] assertthat_0.2.1 highr_0.9 googlesheets4_1.0.1
## [13] cellranger_1.1.0 yaml_2.3.5 pillar_1.8.1
## [16] backports_1.4.1 lattice_0.20-45 glue_1.6.2
## [19] digest_0.6.29 RColorBrewer_1.1-3 rvest_1.0.3
## [22] colorspace_2.0-3 htmltools_0.5.3 Matrix_1.5-1
## [25] pkgconfig_2.0.3 PythonEmbedInR_0.12.79 broom_1.0.1
## [28] haven_2.5.1 scales_1.2.1 tzdb_0.3.0
## [31] googledrive_2.0.0 generics_0.1.3 ellipsis_0.3.2
## [34] cachem_1.0.6 withr_2.5.0 cli_3.4.1
## [37] survival_3.4-0 magrittr_2.0.3 crayon_1.5.2
## [40] readxl_1.4.1 evaluate_0.16 fs_1.5.2
## [43] fansi_1.0.3 xml2_1.3.3 tools_4.2.1
## [46] data.table_1.14.2 hms_1.1.2 gargle_1.2.1
## [49] lifecycle_1.0.2 munsell_0.5.0 reprex_2.0.2
## [52] compiler_4.2.1 jquerylib_0.1.4 rlang_1.0.6
## [55] grid_4.2.1 rstudioapi_0.14 rmarkdown_2.16
## [58] DNAcopy_1.70.0 gtable_0.3.1 codetools_0.2-18
## [61] DBI_1.1.3 R6_2.5.1 lubridate_1.8.0
## [64] knitr_1.40 bit_4.0.4 fastmap_1.1.0
## [67] utf8_1.2.2 stringi_1.7.8 parallel_4.2.1
## [70] vctrs_0.4.2 dbplyr_2.2.1 tidyselect_1.1.2
## [73] xfun_0.33